********************************************************************************
* MANAGE HUT RAW DATA 
* File description: Cleaning raw delivery from SCB from the HUT survey
* Deflate all income variables to 2016 and harmonizing registers between years 
********************************************************************************
                           	
clear all 

set more off

**********CPI 
local kpi2012 1.00709739019733
local kpi2011 1.0160549722249
local kpi2010 1.04274039412114
local kpi2009 1.05596342521524
local kpi2008 1.05262632646951
local kpi2007 1.08922240198272
local kpi2006 1.11332770389135
local kpi2005 1.12849500713267
local kpi2004 1.1333452722063
local kpi2003 1.13782811938152


*Household data

foreach i of numlist 2003/2009 2012 {


use "raw/HUT/hut`i'_hushall.dta", clear

duplicates drop 

*Keep only one observation per household
 
gen year=`i'


if year<=2003 {
 rename cdispz dispinc_hut
}

if year==2004 {
 rename BIDNRK bidnrk
 rename CDISPZ dispinc_hut
 
}

if year>=2005 {
 rename BIDNRK bidnrk
 rename CDISP04z dispinc_hut
 
}

foreach var in dispinc_hut totrad U01 U12 U1211 U1231 U02 U021 U022 U023 U24 U30 U03 U031 U032 U04 U041 U043 U044 U045 U05 U056 U06 U07 U071 U0711 U07111 U07112 U0722 U0723 U073 U0731 U0732 U0733 U08 U083110101 U083110102 U0831102 U09 U091 U0911 U0922 U0931 U0932 U094 U0942 U095 U10 U11 U1111101 U1111102 U11112 U112 U12 U1211 U1213103 U1231 U1254101 U21 U22 U23 U24 U25 U266 U30 {

replace `var'=`var'*`kpi`i''

}



******************Rename and create consumption groups**************************

rename BANTBRNZ no_kids
label var no_kids "number of kids"

rename BANTVUXZ no_adults
label var no_adults "number of adults"

rename BBOFOR home
label var home "type of home"
label define home 1 "" 2 "married" 3 "single_co" 4 "married_co" /*co means living with other people*/

rename BEGSZ household_type
label define household_type 1 "lodger" 2 "service housing" 3 "rental" 4 "apartment" 5 "house" 6 "own apartment" /*co means living with other people*/

rename BCDZ decile
label var decile "deciles"

rename BHREG region 
label define region 1 "Stockholm" 3 "Large cities" 4 "South" 5 "North urban" 6 "North rural" 8 "Gothenburg" 9 "Malmoe"
label var region "Urban or Rural"

capture rename BKEZ consumption_units
capture label var consumption_units "ke-scale"

rename BKVIZ quintile

rename BKVZ quartile

rename BTOTINDZ ind
label var ind "total number of individuals in household"

rename BVIKT weight 

g bm_nondur					= U12-U1211-U1231+U21+U24+U30	
rename totrad totexp
rename U10 education_cost 
gen food_home=U01+U1111102
rename U021 alcohol
rename U022 cigarettes
rename U03 clothing
rename U041 housing
gen home_util=U044+U045
rename U05 furniture
rename U06	health
rename U071	cars
rename U0722 gas
rename U0723 car_repair
gen public_transport=U0731+U0732
rename U0733 airtravel
rename U083110101 homephone
gen cellphone=U083110102+U0831102
gen recreation_1=U091+U0922+U0932
gen books=U0931+U095	
rename U094 recreation_2
gen food_out=U1111101+U11112
rename U112	hotels 
rename U1211 barbers
rename U1231 jewelry
rename U1254101	car_insurance

*Gengerating categories following Bertrand and Morse (2016)

gen bm_clothing=clothing+jewelry
label var bm_clothing "Category 1"

gen bm_housing=housing
label var bm_housing "Category 2"

gen bm_foodhome=food_home
label var bm_foodhome "Category 3"

gen bm_foodout=food_out
label var bm_foodout "Category 4"

gen bm_alcohol=alcohol+cigarettes
label var bm_alcohol "Category 5"

gen bm_personal_care=barbers
label var bm_personal_care "Category 6"

gen bm_communication_media=cellphone+homephone+books
label var bm_communication_media "Category 7"

gen bm_entertainment=recreation_2
label var bm_entertainment "Category 8"

gen bm_utilities=home_util
label var bm_utilities "Category 9"

gen bm_othertransport=airtravel+public_transport+car_insurance+car_repair+gas+hotels
label var bm_othertransport "Category 10"

gen bm_healtheduc=health+educ
label var bm_healtheduc "Category 11"

gen bm_furn=furniture
label var bm_furn "Category 13"

gen bm_entertainmentdur=recreation_1
label var bm_entertainmentdur "Category 14"

gen bm_vehicles=cars
label var bm_vehicles "Category 15"

drop U*

tempfile hut`i'
save `hut`i''


}

foreach i of numlist 2003/2009 {
	
append using `hut`i''	
}


duplicates drop 

save "use/hut_2003_2012", replace

*** Individual data

foreach i of numlist 2003/2009 2012 {
use "raw/HUT/hut`i'_individ.dta"

gen year=`i'

if year>=2004 {
 rename BIDNRK bidnrk
}

if year>=2005 {
 rename BKF bkf
}


*Keep only heads
keep if bkf==1

keep bidnrk LopNr year


tempfile hut`i'

save `hut`i''
}

foreach i of numlist 2003/2009 {

append using `hut`i''
}

duplicates drop 

merge 1:1 bidnrk year using "use/hut_2003_2012"

save "use/hut_2003_2012", replace


use "use/LISA/Lisa_2012.dta", replace

foreach i of numlist 2003/2009 {
append using "use/LISA/Lisa_`i'.dta"
}

duplicates drop LopNr year, force

bys muni year: egen p90_muni=pctile(dispinch),p(90) 
bys muni year: egen p80_muni=pctile(dispinch),p(80)
bys muni year: egen p50_muni=pctile(dispinch),p(50)
bys muni year: egen p20_muni=pctile(dispinch),p(20)
bys muni year: egen p25_muni=pctile(dispinch),p(25)
bys muni year: egen p75_muni=pctile(dispinch),p(75)
bys muni year: egen p95_muni=pctile(dispinch),p(95)

bys muni year: egen share_married=mean(married)

g low_educ=cond(educ<=11,1,0)

bys muni year: egen share_loweduc=mean(low_educ)
drop low_educ
bys muni year: egen share_female=mean(female)

gen poor=cond(dispinch<(0.6*p50_muni),1,0)

bys muni year: egen share_poor=mean(poor)

bys muni year: egen avg_kids=mean(kids)
bys muni year: egen avg_age=mean(age)

bys muni year: egen share_unemployed=mean(unemployed)
bys muni year: egen share_selfemployed=mean(selfemployed)
bys muni year: gen population=_N

*Adjusting for name change
replace municipality=330 if municipality==1917

merge m:1 municipality year using "raw/houseprices", keep(3) nogen

merge 1:1 LopNr year using "use/hut_2003_2012", keep(3) nogen

merge 1:1 LopNr year using "use/yitz/yitz_hut", keep(3) nogen

drop if dispinc_hut<0

gen age2=age^2

*Create variables for analysis

*Create savings 
gen savings=dispinc_hut-totexp
gen lnsavings=log(savings)
gen ihssavings=asinh(savings)

*Create consumption categories
*Based on visibility scores above 0.51
gen visible_consumption=0
replace visible_consumption=bm_vehicles+bm_clothing+bm_entertainmentdur+bm_alcohol+bm_foodout+bm_personal_care+bm_furn+bm_entertainment

gen non_visible_consumption=0
replace non_visible_consumption=bm_nondur+bm_utilities+bm_healtheduc+bm_othertransport+bm_housing+bm_communication_media+bm_foodhome


g visible_share=visible_consumption/totexp

gen lndispinc_hut=log(dispinc_hut)
gen lndispinc_hut2=log(dispinc_hut^2)
replace yitz_muni=yitz_muni/100000
gen lndispinch=log(dispinch)
gen lnp50_muni=log(p50_muni)
gen lnp20_muni=log(p20_muni)
gen lnhouseprice=log(houseprices)

save "use/final_hut", replace


